from database.db_handler import MysqlHander
from common.my_http import MyHttp
import urllib.request
from bs4 import BeautifulSoup
from common.macro_data import MacroData
import glob
import jieba
import re
import threading
import hashlib

'''
宏观数据:商务部
'''

class ShangwubuData:
    def __init__(self):
        self.macro = MacroData()
    
    def lishi_d1(self, tag):
        for i in range(2,10):
            url = "http://www.mofcom.gov.cn/article/zcfb/" + tag + "/?" + str(i)
            self.v1(url)
    
    def v1(self, url):
        bs4_data = MyHttp.bs4_utf8_data(url)
        for valid_data in bs4_data.find(name="ul", attrs={'class':'txtList_01'}).find_all(name="li"):
            url_data = "http://www.mofcom.gov.cn" + valid_data.find("a").attrs["href"]
            title_data = valid_data.find("a").text
            date_data = valid_data.find("span").text.replace("[","").replace("]","")
            self.macro.add_macro_data(date_data, 100, title_data, url_data, "商务部")

    def v2(self, url):
        bs4_data = MyHttp.bs4_utf8_data(url)
        for ul in bs4_data.find_all(name="ul", attrs={'class':'txtList_01'}):
            for valid_data in ul.find_all(name="li"):
                try:
                    url_data = "http://www.mofcom.gov.cn" + valid_data.find("a").attrs["href"]
                    title_data = valid_data.find("a").text
                    date_data = valid_data.find("span").text.replace("[","").replace("]","")[0:10]
                    self.macro.add_macro_data(date_data, 100, title_data, url_data, "商务部")
                except Exception as e:
                    pass

    
    def get_data(self):
        #self.lishi_d1("zcblgg")
        #self.lishi_d1("zcgfxwj")
        #self.lishi_d1("zczh")
        #self.lishi_d1("zcwwmwwz")
        #self.lishi_d1("zcgnmy")
        #self.lishi_d1("zcdwmy")
        #self.lishi_d1("zcwgtz")
        #self.lishi_d1("zcjjhz")
        #self.lishi_d1("zcfwmy")
        self.v1("http://www.mofcom.gov.cn/article/zcfb/zcblgg/")
        self.v1("http://www.mofcom.gov.cn/article/zcfb/zcgfxwj/")
        self.v1("http://www.mofcom.gov.cn/article/zcfb/zczh/")
        self.v1("http://www.mofcom.gov.cn/article/zcfb/zcwwmwwz/")
        self.v1("http://www.mofcom.gov.cn/article/zcfb/zcgnmy/")
        self.v1("http://www.mofcom.gov.cn/article/zcfb/zcdwmy/")
        self.v1("http://www.mofcom.gov.cn/article/zcfb/zcwgtz/")
        self.v1("http://www.mofcom.gov.cn/article/zcfb/zcjjhz/")
        self.v1("http://www.mofcom.gov.cn/article/zcfb/zcfwmy/")
        self.v2("http://www.mofcom.gov.cn/article/yjts/")
        self.v2("http://www.mofcom.gov.cn/article/yjts/yjjwfx/")
        self.v2("http://www.mofcom.gov.cn/article/yjts/yjshmybl/")
        self.v2("http://www.mofcom.gov.cn/article/yjts/yjgwdhmyjjdc/")
        #self.v2("http://www.mofcom.gov.cn/article/yjts/yjgwdhmyjjdc/?2")
        #self.v2("http://www.mofcom.gov.cn/article/yjts/yjgwdhmyjjdc/?3")

def shangwubu_api():
    d = ShangwubuData()
    d.get_data()

if __name__ == '__main__':
    shangwubu_api()
